package com.yehongyu.mytools.poidoc;

import java.io.IOException;

import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.xmlbeans.XmlException;

public class WordUtil {

	/**
	 * 从word 2007文档中提取纯文本
	 * @param fileName
	 * @return
	 * @throws IOException
	 * @throws OpenXML4JException
	 * @throws XmlException
	 */
	public static String extractTextFromDOC2007(String fileName)
			throws IOException, OpenXML4JException, XmlException {
		OPCPackage opcPackage = POIXMLDocument.openPackage(fileName);
		POIXMLTextExtractor ex = new XWPFWordExtractor(opcPackage);
		return ex.getText();
	}

}
